## Caraway, Rickard, and Anner 2012 IO

library(foreign) 
library(Amelia)

## Load original dataset
cra2012 <- read.dta("CRA2012 IO Rep Data.dta")
head(cra2012)
dim(cra2012)

## How many variables? 16: no reduction necessary

## Imputation
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(cra2012)
mean(NAs(cra2012)/nrow(cra2012))*100

## Thus: 20 imputations

## Set bounds: mefp must be greater than or equal to 0
head(cra2012)
bds <- matrix(c(3, 0, 100), nrow = 1, ncol = 3)
bds

set.seed(02138)
cra2012.out <- amelia(cra2012, m = 20, ts = "year", cs = "ccode", polytime = 3, lags = c("mefp", "arr", "lnewplp", "iltenpolity2_lnewplp"), empri = 0.01*nrow(cra2012), bounds = bds, max.resample = 1000)

write.amelia(obj=cra2012.out, file.stem = "CRA2012 IO Imp Data", format = "dta", separate = FALSE)